#!/usr/bin/env python
#coding=utf8
from __future__ import print_function
from __future__ import unicode_literals
import os
import os.path
import codecs
import chardet
from chardet.universaldetector import UniversalDetector

import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('path',nargs=1)
parser.add_argument('-v',choices=('none','all'),default="none")
args = parser.parse_args()

path = args.path[0]
file_list = []
for root, dirs, files in os.walk(path):
	for name in files:
		path = os.path.join(root, name)
		file_list.append(path)

allow_file_type = ['.'+i for i in 'cpp|c|py|java|txt|cc|go|h'.split('|')]
all_file_type = list(set([os.path.splitext(i)[1] for i in file_list]))
print("all file type:",all_file_type)
print("allow file type:",allow_file_type)
print("ignore file type:",list(set(all_file_type)-set(allow_file_type)))

detector = UniversalDetector()

def check(name):
	if os.path.splitext(name)[1] not in allow_file_type:
		if args.v=='all':print("ignore",name)
		return 
	detector.reset()
	for line in open(name,'rb'):
		detector.feed(line)
		if detector.done:break
	detector.close()
	if args.v=='all':print(name,detector.result)
	if detector.result['encoding'] in ['Big5','GB2312','GB18030','EUC-TW', 'HZ-GB-2312', 'ISO-2022-CN']:
		print("start ",name,end='   ')
		gbk_to_utf8(name)
		print("success")

def gbk_to_utf8(name):
    data = codecs.open(name,'r','gbk').read()
    codecs.open(name,'w','utf8').write(data)

for i in file_list:
	try:
		check(i)
	except Exception as e:
		print(e)
